from pathlib import Path

import unicodedata

unique_records = []
def deduplicate_by_field(input_file):

    with open(input_file, 'r', encoding='utf8') as f:
        # 数据记录去重
        seen = set()
        records = f.readlines()
        for record in records:
            stripped = record.strip()
            if stripped not in seen:
                unique_records.append(record)  # 保留原始行（含\n）
                seen.add(stripped)
    with open(input_file+"out", 'w', encoding='utf8') as f:
        f.writelines(unique_records)


def main():
    input_file = input("输入去重文件：").strip()
    input_file = unicodedata.normalize('NFC', input_file).strip('\u202a\u202b\u200e\u200f')
    deduplicate_by_field(input_file)

if __name__ == "__main__":
    main()